import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("hotel_bookings.csv")
df.head(5)
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | NaN | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | NaN | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
df.shape
(119390, 32)
df.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
df.isnull().values.any()
True
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df.fillna(0, inplace = True)
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 0 babies 0 meal 0 country 0 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 0 company 0 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df['meal'].value_counts()
meal BB 92310 HB 14463 SC 10650 Undefined 1169 FB 798 Name: count, dtype: int64
df['children'].value_counts()
children 0.0 110800 1.0 4861 2.0 3652 3.0 76 10.0 1 Name: count, dtype: int64
df['adults'].unique()
array([ 2, 1, 3, 4, 40, 26, 50, 27, 55, 0, 20, 6, 5, 10],
dtype=int64)
df['babies'].unique()
array([ 0, 1, 2, 10, 9], dtype=int64)
len(df[df['adults'] == 0])
403
len(df[df['babies'] == 0])
118473
filter = (df['children'] == 0) & (df['adults'] == 0) & (df['babies'] == 0)
df[filter]
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2224 | Resort Hotel | 0 | 1 | 2015 | October | 41 | 6 | 0 | 3 | 0 | ... | No Deposit | 0.0 | 174.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 10/6/2015 |
| 2409 | Resort Hotel | 0 | 0 | 2015 | October | 42 | 12 | 0 | 0 | 0 | ... | No Deposit | 0.0 | 174.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 10/12/2015 |
| 3181 | Resort Hotel | 0 | 36 | 2015 | November | 47 | 20 | 1 | 2 | 0 | ... | No Deposit | 38.0 | 0.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 11/23/2015 |
| 3684 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 1 | 4 | 0 | ... | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/4/2016 |
| 3708 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 2 | 4 | 0 | ... | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/5/2016 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115029 | City Hotel | 0 | 107 | 2017 | June | 26 | 27 | 0 | 3 | 0 | ... | No Deposit | 7.0 | 0.0 | 0 | Transient | 100.80 | 0 | 0 | Check-Out | 6/30/2017 |
| 115091 | City Hotel | 0 | 1 | 2017 | June | 26 | 30 | 0 | 1 | 0 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 1 | 1 | Check-Out | 7/1/2017 |
| 116251 | City Hotel | 0 | 44 | 2017 | July | 28 | 15 | 1 | 1 | 0 | ... | No Deposit | 425.0 | 0.0 | 0 | Transient | 73.80 | 0 | 0 | Check-Out | 7/17/2017 |
| 116534 | City Hotel | 0 | 2 | 2017 | July | 28 | 15 | 2 | 5 | 0 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient-Party | 22.86 | 0 | 1 | Check-Out | 7/22/2017 |
| 117087 | City Hotel | 0 | 170 | 2017 | July | 30 | 27 | 0 | 2 | 0 | ... | No Deposit | 52.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/29/2017 |
180 rows × 32 columns
data = df[~filter]
data.shape
(119210, 32)
df['hotel'].unique()
array(['Resort Hotel', 'City Hotel'], dtype=object)
df.head(10)
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
| 5 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
| 6 | Resort Hotel | 0 | 0 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 107.0 | 0 | 0 | Check-Out | 7/3/2015 |
| 7 | Resort Hotel | 0 | 9 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 303.0 | 0.0 | 0 | Transient | 103.0 | 0 | 1 | Check-Out | 7/3/2015 |
| 8 | Resort Hotel | 1 | 85 | 2015 | July | 27 | 1 | 0 | 3 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 82.0 | 0 | 1 | Canceled | 5/6/2015 |
| 9 | Resort Hotel | 1 | 75 | 2015 | July | 27 | 1 | 0 | 3 | 2 | ... | No Deposit | 15.0 | 0.0 | 0 | Transient | 105.5 | 0 | 0 | Canceled | 4/22/2015 |
10 rows × 32 columns
data.head(2)
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 rows × 32 columns
data['hotel'].unique()
array(['Resort Hotel', 'City Hotel'], dtype=object)
data['is_canceled'].unique()
array([0, 1], dtype=int64)
resort = data[(data['hotel'] == 'Resort Hotel') & (data['is_canceled'] == 0)]
City = data[(data['hotel'] == 'City Hotel') & (data['is_canceled'] == 0)]
resort.shape
(28927, 32)
City.shape
(46084, 32)
resort['country'].value_counts()
country
PRT 10184
GBR 5922
ESP 3105
IRL 1734
FRA 1399
...
UGA 1
COM 1
MUS 1
BIH 1
SAU 1
Name: count, Length: 119, dtype: int64
resort['country'].value_counts().index
Index(['PRT', 'GBR', 'ESP', 'IRL', 'FRA', 'DEU', 'CN', 'NLD', 0, 'USA',
...
'MKD', 'SMR', 'BDI', 'SYR', 'CYM', 'UGA', 'COM', 'MUS', 'BIH', 'SAU'],
dtype='object', name='country', length=119)
lables = resort['country'].value_counts().index
values = resort['country'].value_counts()
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.express as px
trace=go.Pie(labels=lables, values=values,
hoverinfo='label+percent', textinfo='value'
)
iplot([trace])
data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
country_wise = data[data['is_canceled'] == 0]['country'].value_counts().reset_index()
country_wise.columns = ['country', 'No of guests']
country_wise
| country | No of guests | |
|---|---|---|
| 0 | PRT | 20977 |
| 1 | GBR | 9668 |
| 2 | FRA | 8468 |
| 3 | ESP | 6383 |
| 4 | DEU | 6067 |
| ... | ... | ... |
| 161 | BHR | 1 |
| 162 | DJI | 1 |
| 163 | MLI | 1 |
| 164 | NPL | 1 |
| 165 | FRO | 1 |
166 rows × 2 columns
!pip install folium
Requirement already satisfied: folium in c:\users\abhis\anaconda3\lib\site-packages (0.17.0) Requirement already satisfied: branca>=0.6.0 in c:\users\abhis\anaconda3\lib\site-packages (from folium) (0.7.2) Requirement already satisfied: jinja2>=2.9 in c:\users\abhis\anaconda3\lib\site-packages (from folium) (3.1.3) Requirement already satisfied: numpy in c:\users\abhis\anaconda3\lib\site-packages (from folium) (1.26.4) Requirement already satisfied: requests in c:\users\abhis\anaconda3\lib\site-packages (from folium) (2.31.0) Requirement already satisfied: xyzservices in c:\users\abhis\anaconda3\lib\site-packages (from folium) (2022.9.0) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\abhis\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (2.1.3) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (2.0.7) Requirement already satisfied: certifi>=2017.4.17 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (2024.6.2)
# map graph
import folium
from folium.plugins import HeatMap
b = folium.Map()
country_wise.dtypes
country object No of guests int64 dtype: object
gueats = px.choropleth(country_wise,
locations = country_wise['country'],
color = country_wise['No of guests'],
hover_name = country_wise['country'],
title = "Home country of gueats"
)
gueats.show()
!pip install sort-dataframeby-monthorweek
Requirement already satisfied: sort-dataframeby-monthorweek in c:\users\abhis\anaconda3\lib\site-packages (0.4)
!pip install sorted-months-weekdays
Requirement already satisfied: sorted-months-weekdays in c:\users\abhis\anaconda3\lib\site-packages (0.2)
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
# How much do guests pay for a room per night?
data2 = data[data['is_canceled'] == 0]
data2.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
plt.figure(figsize=(12, 8))
sns.boxplot(x = 'reserved_room_type',
y = 'adr',
hue = 'hotel',
data = data2)
plt.title("Price of room types per night and person")
plt.xlabel("Room Type")
plt.ylabel("Price [EUR]")
plt.legend(loc = 'upper right')
plt.ylim(0, 600)
plt.show()
# How does the price per night vary over the year?
data_resort = resort[resort['is_canceled'] == 0]
data_city = City[City['is_canceled'] == 0]
data_city
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 40060 | City Hotel | 0 | 6 | 2015 | July | 27 | 1 | 0 | 2 | 1 | ... | No Deposit | 6.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/3/2015 |
| 40066 | City Hotel | 0 | 3 | 2015 | July | 27 | 2 | 0 | 3 | 1 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 58.67 | 0 | 0 | Check-Out | 7/5/2015 |
| 40070 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 86.00 | 0 | 0 | Check-Out | 7/5/2015 |
| 40071 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 43.00 | 0 | 0 | Check-Out | 7/5/2015 |
| 40072 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 86.00 | 0 | 0 | Check-Out | 7/5/2015 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 119385 | City Hotel | 0 | 23 | 2017 | August | 35 | 30 | 2 | 5 | 2 | ... | No Deposit | 394.0 | 0.0 | 0 | Transient | 96.14 | 0 | 0 | Check-Out | 9/6/2017 |
| 119386 | City Hotel | 0 | 102 | 2017 | August | 35 | 31 | 2 | 5 | 3 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 225.43 | 0 | 2 | Check-Out | 9/7/2017 |
| 119387 | City Hotel | 0 | 34 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 157.71 | 0 | 4 | Check-Out | 9/7/2017 |
| 119388 | City Hotel | 0 | 109 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | No Deposit | 89.0 | 0.0 | 0 | Transient | 104.40 | 0 | 0 | Check-Out | 9/7/2017 |
| 119389 | City Hotel | 0 | 205 | 2017 | August | 35 | 29 | 2 | 7 | 2 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 151.20 | 0 | 2 | Check-Out | 9/7/2017 |
46084 rows × 32 columns
data_resort
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.00 | 0 | 1 | Check-Out | 7/3/2015 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 40055 | Resort Hotel | 0 | 212 | 2017 | August | 35 | 31 | 2 | 8 | 2 | ... | No Deposit | 143.0 | 0.0 | 0 | Transient | 89.75 | 0 | 0 | Check-Out | 9/10/2017 |
| 40056 | Resort Hotel | 0 | 169 | 2017 | August | 35 | 30 | 2 | 9 | 2 | ... | No Deposit | 250.0 | 0.0 | 0 | Transient-Party | 202.27 | 0 | 1 | Check-Out | 9/10/2017 |
| 40057 | Resort Hotel | 0 | 204 | 2017 | August | 35 | 29 | 4 | 10 | 2 | ... | No Deposit | 250.0 | 0.0 | 0 | Transient | 153.57 | 0 | 3 | Check-Out | 9/12/2017 |
| 40058 | Resort Hotel | 0 | 211 | 2017 | August | 35 | 31 | 4 | 10 | 2 | ... | No Deposit | 40.0 | 0.0 | 0 | Contract | 112.80 | 0 | 1 | Check-Out | 9/14/2017 |
| 40059 | Resort Hotel | 0 | 161 | 2017 | August | 35 | 31 | 4 | 10 | 2 | ... | No Deposit | 69.0 | 0.0 | 0 | Transient | 99.06 | 0 | 0 | Check-Out | 9/14/2017 |
28927 rows × 32 columns
resort_hotel = data_resort.groupby(['arrival_date_month'])['adr'].mean().reset_index()
resort_hotel
| arrival_date_month | adr | |
|---|---|---|
| 0 | April | 75.867816 |
| 1 | August | 181.205892 |
| 2 | December | 68.410104 |
| 3 | February | 54.147478 |
| 4 | January | 48.761125 |
| 5 | July | 150.122528 |
| 6 | June | 107.974850 |
| 7 | March | 57.056838 |
| 8 | May | 76.657558 |
| 9 | November | 48.706289 |
| 10 | October | 61.775449 |
| 11 | September | 96.416860 |
city_hotel = data_city.groupby(['arrival_date_month'])['adr'].mean().reset_index()
city_hotel
| arrival_date_month | adr | |
|---|---|---|
| 0 | April | 111.962267 |
| 1 | August | 118.674598 |
| 2 | December | 88.401855 |
| 3 | February | 86.520062 |
| 4 | January | 82.330983 |
| 5 | July | 115.818019 |
| 6 | June | 117.874360 |
| 7 | March | 90.658533 |
| 8 | May | 120.669827 |
| 9 | November | 86.946592 |
| 10 | October | 102.004672 |
| 11 | September | 112.776582 |
final=resort_hotel.merge(city_hotel,on='arrival_date_month')
final.columns=['month','price_for_resort','price_for_city_hotel']
final
| month | price_for_resort | price_for_city_hotel | |
|---|---|---|---|
| 0 | April | 75.867816 | 111.962267 |
| 1 | August | 181.205892 | 118.674598 |
| 2 | December | 68.410104 | 88.401855 |
| 3 | February | 54.147478 | 86.520062 |
| 4 | January | 48.761125 | 82.330983 |
| 5 | July | 150.122528 | 115.818019 |
| 6 | June | 107.974850 | 117.874360 |
| 7 | March | 57.056838 | 90.658533 |
| 8 | May | 76.657558 | 120.669827 |
| 9 | November | 48.706289 | 86.946592 |
| 10 | October | 61.775449 | 102.004672 |
| 11 | September | 96.416860 | 112.776582 |
import sort_dataframeby_monthorweek as sd
final = sd.Sort_Dataframeby_Month(df = final, monthcolumnname = 'month')
final
| month | price_for_resort | price_for_city_hotel | |
|---|---|---|---|
| 0 | January | 48.761125 | 82.330983 |
| 1 | February | 54.147478 | 86.520062 |
| 2 | March | 57.056838 | 90.658533 |
| 3 | April | 75.867816 | 111.962267 |
| 4 | May | 76.657558 | 120.669827 |
| 5 | June | 107.974850 | 117.874360 |
| 6 | July | 150.122528 | 115.818019 |
| 7 | August | 181.205892 | 118.674598 |
| 8 | September | 96.416860 | 112.776582 |
| 9 | October | 61.775449 | 102.004672 |
| 10 | November | 48.706289 | 86.946592 |
| 11 | December | 68.410104 | 88.401855 |
px.line(final, x = 'month', y=['price_for_resort', 'price_for_city_hotel'], title ='Room price per night over the month' )
data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
data['market_segment'].value_counts()
market_segment Online TA 56408 Offline TA/TO 24182 Groups 19791 Direct 12582 Corporate 5282 Complementary 728 Aviation 235 Undefined 2 Name: count, dtype: int64
plt.figure(figsize = (8,8))
sns.boxplot(x = 'market_segment',
y = 'stays_in_weekend_nights',
hue = 'hotel',
data = data)
<Axes: xlabel='market_segment', ylabel='stays_in_weekend_nights'>
# Analysing Preference of Guests, what they basically Prefer in meal?
data['meal'].value_counts()
meal BB 92236 HB 14458 SC 10549 Undefined 1169 FB 798 Name: count, dtype: int64
px.pie(data, names = data['meal'].value_counts().index,
values = data['meal'].value_counts().values, hole = 0.5)
data.groupby(['hotel', 'meal']).agg({'meal':'count'}).unstack()
| meal | |||||
|---|---|---|---|---|---|
| meal | BB | FB | HB | SC | Undefined |
| hotel | |||||
| City Hotel | 62233.0 | 44.0 | 6412.0 | 10474.0 | NaN |
| Resort Hotel | 30003.0 | 754.0 | 8046.0 | 75.0 | 1169.0 |
# Analyse Special Request done by Cuustomers
data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
sns.countplot(x = 'total_of_special_requests', data = data)
plt.title("Total Special Request")
Text(0.5, 1.0, 'Total Special Request')
sns.countplot(x = 'total_of_special_requests', data = data, hue = 'hotel')
plt.title("Total Special Request")
Text(0.5, 1.0, 'Total Special Request')
data.groupby(['total_of_special_requests',
'is_canceled']).agg({'total_of_special_requests':'count'})
| total_of_special_requests | ||
|---|---|---|
| total_of_special_requests | is_canceled | |
| 0 | 0 | 36667 |
| 1 | 33534 | |
| 1 | 0 | 25867 |
| 1 | 7316 | |
| 2 | 0 | 10086 |
| 1 | 2866 | |
| 3 | 0 | 2049 |
| 1 | 445 | |
| 4 | 0 | 304 |
| 1 | 36 | |
| 5 | 0 | 38 |
| 1 | 2 |
pivot = data.groupby(['total_of_special_requests',
'is_canceled']).agg({'total_of_special_requests':'count'}).rename(columns = {'total_of_special_requests':'count'}).unstack()
pivot
| count | ||
|---|---|---|
| is_canceled | 0 | 1 |
| total_of_special_requests | ||
| 0 | 36667 | 33534 |
| 1 | 25867 | 7316 |
| 2 | 10086 | 2866 |
| 3 | 2049 | 445 |
| 4 | 304 | 36 |
| 5 | 38 | 2 |
pivot.plot(kind= 'bar')
<Axes: xlabel='total_of_special_requests'>
# Which are the most busy month or in which months Guests are high?
data_resort.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
data_resort.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
rush_resort=data_resort['arrival_date_month'].value_counts().reset_index()
rush_resort.columns=['month','no of guests']
rush_resort
| month | no of guests | |
|---|---|---|
| 0 | August | 3257 |
| 1 | July | 3137 |
| 2 | October | 2575 |
| 3 | March | 2571 |
| 4 | April | 2550 |
| 5 | May | 2535 |
| 6 | February | 2308 |
| 7 | September | 2102 |
| 8 | June | 2037 |
| 9 | December | 2014 |
| 10 | November | 1975 |
| 11 | January | 1866 |
rush_city=data_city['arrival_date_month'].value_counts().reset_index()
rush_city.columns=['month','no of guests']
rush_city
| month | no of guests | |
|---|---|---|
| 0 | August | 5367 |
| 1 | July | 4770 |
| 2 | May | 4568 |
| 3 | June | 4358 |
| 4 | October | 4326 |
| 5 | September | 4283 |
| 6 | March | 4049 |
| 7 | April | 4010 |
| 8 | February | 3051 |
| 9 | November | 2676 |
| 10 | December | 2377 |
| 11 | January | 2249 |
final_rush = rush_resort.merge(rush_city, on = 'month')
final_rush.columns = ['month', 'no of guests in resort', 'no of guests in city']
final_rush
| month | no of guests in resort | no of guests in city | |
|---|---|---|---|
| 0 | August | 3257 | 5367 |
| 1 | July | 3137 | 4770 |
| 2 | October | 2575 | 4326 |
| 3 | March | 2571 | 4049 |
| 4 | April | 2550 | 4010 |
| 5 | May | 2535 | 4568 |
| 6 | February | 2308 | 3051 |
| 7 | September | 2102 | 4283 |
| 8 | June | 2037 | 4358 |
| 9 | December | 2014 | 2377 |
| 10 | November | 1975 | 2676 |
| 11 | January | 1866 | 2249 |
final_rush = sd.Sort_Dataframeby_Month(df =final_rush, monthcolumnname = 'month')
final_rush
| month | no of guests in resort | no of guests in city | |
|---|---|---|---|
| 0 | January | 1866 | 2249 |
| 1 | February | 2308 | 3051 |
| 2 | March | 2571 | 4049 |
| 3 | April | 2550 | 4010 |
| 4 | May | 2535 | 4568 |
| 5 | June | 2037 | 4358 |
| 6 | July | 3137 | 4770 |
| 7 | August | 3257 | 5367 |
| 8 | September | 2102 | 4283 |
| 9 | October | 2575 | 4326 |
| 10 | November | 1975 | 2676 |
| 11 | December | 2014 | 2377 |
final_rush.dtypes
month object no of guests in resort int64 no of guests in city int64 dtype: object
px.line(data_frame =final_rush, x = 'month',
y = ['no of guests in resort', 'no of guests in city'],
title = 'Total no og gauest per months')
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
filter = data['is_canceled'] == 0
clean_data = data[filter]
clean_data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
clean_data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
'arrival_date_month', 'arrival_date_week_number',
'arrival_date_day_of_month', 'stays_in_weekend_nights',
'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
'country', 'market_segment', 'distribution_channel',
'is_repeated_guest', 'previous_cancellations',
'previous_bookings_not_canceled', 'reserved_room_type',
'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
'company', 'days_in_waiting_list', 'customer_type', 'adr',
'required_car_parking_spaces', 'total_of_special_requests',
'reservation_status', 'reservation_status_date'],
dtype='object')
clean_data['total_nights_spend'] = clean_data['stays_in_weekend_nights'] + clean_data['stays_in_week_nights']
C:\Users\abhis\AppData\Local\Temp\ipykernel_10072\1100290991.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
clean_data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | total_nights_spend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 | 2 |
5 rows × 33 columns
stay = clean_data.groupby(['total_nights_spend', 'hotel']).agg('count').reset_index()
stay = stay.iloc[:, 0:3]
stay.head()
| total_nights_spend | hotel | is_canceled | |
|---|---|---|---|
| 0 | 0 | City Hotel | 251 |
| 1 | 0 | Resort Hotel | 371 |
| 2 | 1 | City Hotel | 9155 |
| 3 | 1 | Resort Hotel | 6579 |
| 4 | 2 | City Hotel | 10983 |
stay = stay.rename(columns = {'is_canceled': 'Number of stay'})
stay.head()
| total_nights_spend | hotel | Number of stay | |
|---|---|---|---|
| 0 | 0 | City Hotel | 251 |
| 1 | 0 | Resort Hotel | 371 |
| 2 | 1 | City Hotel | 9155 |
| 3 | 1 | Resort Hotel | 6579 |
| 4 | 2 | City Hotel | 10983 |
plt.figure(figsize=(20, 8))
sns.barplot(x = 'total_nights_spend', y = 'Number of stay', hue = 'hotel',
hue_order = ['City Hotel', 'Resort Hotel'], data = stay)
<Axes: xlabel='total_nights_spend', ylabel='Number of stay'>
# Bookings by market segment
clean_data['market_segment'].value_counts()
market_segment Online TA 35673 Offline TA/TO 15880 Direct 10648 Groups 7697 Corporate 4291 Complementary 639 Aviation 183 Name: count, dtype: int64
fig = px.pie(clean_data,
values = clean_data['market_segment'].value_counts().values,
names = clean_data['market_segment'].value_counts().index,
title = "Booking per market segment")
fig.update_traces(rotation=-90, textinfo="percent+label")
fig.show()
total_cancellations = df['is_canceled'].sum()
print(total_cancellations)
44224
cancellations_by_month = df[df['is_canceled'] == 1].groupby('arrival_date_month').size()
month_with_most_cancellations = cancellations_by_month.idxmax()
print(month_with_most_cancellations)
August